package cn.sunxyz.spider.parser.impl;

import cn.sunxyz.spider.Page;
import cn.sunxyz.spider.parser.Parser;
import org.jsoup.nodes.Document;

import java.util.Set;
import java.util.stream.Collectors;

/**
 * Created by yangrd on 2018/6/14
 **/
public class UrlParser implements Parser<Set<String>> {
    @Override
    public Set<String> parser(Page page) {
        return getHrefSet(page.getDocument());
    }

    private Set<String> getHrefSet(Document document) {
        return document.select("a").parallelStream().map(link -> link.attr("abs:href")).filter(link -> !link.trim().equals("")).collect(Collectors.toSet());
    }
}
